Here, we will run a gene set enrichment analysis (Gene set enrichment analysis) with clusterProfiler on the patient-specific lists of differentially expressed genes (DEG, RT vs CLL) ranked by descending average log2 fold-change.
library(Seurat)
library(UpSetR)
library(clusterProfiler)
library(org.Hs.eg.db)
library(ggrepel)
library(ggforce)
library(tidyverse)
# Paths
path_to_12 <- here::here("results/R_objects/6.seurat_annotated_12.rds")
path_to_19 <- here::here("results/R_objects/6.seurat_annotated_19.rds")
path_to_63 <- here::here("results/R_objects/patient_63/3.seurat_annotated.rds")
path_to_365 <- here::here("results/R_objects/6.seurat_annotated_365.rds")
path_to_3299 <- here::here("results/R_objects/6.seurat_annotated_3299.rds")
path_to_deg <- here::here("6-differential_expression_analysis/tmp/patient_specific_differential_expression_analysis_rt_vs_cll.rds")
path_to_save_rds <- here::here("6-differential_expression_analysis/tmp/patient_specific_gsea_rt_vs_cll.rds")
path_to_save_rds2 <- here::here("6-differential_expression_analysis/tmp/patient_specific_gsea_raw_rt_vs_cll.rds")
path_to_save_xlsx <- here::here("results/tables/DEA/patient_specific_gsea_rt_vs_cll.xlsx")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
# Thresholds
alpha <- 0.05
GO_max_total_genes <- 250
GO_min_enriched_genes <- 3
GO_p_adj_threshold <- 0.01
GO_min_odds_ratio <- 2.5
max_gs_size <- 250
min_gs_size <- 10
simplify_cutoff <- 0.75
# Load Seurat objects
paths_to_load <- c(
"12" = path_to_12,
"19" = path_to_19,
"3299" = path_to_3299,
"365" = path_to_365,
"63" = path_to_63
)
seurat_list <- purrr::map(paths_to_load, readRDS)
seurat_list
## $`12`
## An object of class Seurat
## 23326 features across 5785 samples within 1 assay
## Active assay: RNA (23326 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
##
## $`19`
## An object of class Seurat
## 23326 features across 7284 samples within 1 assay
## Active assay: RNA (23326 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
##
## $`3299`
## An object of class Seurat
## 23326 features across 6063 samples within 1 assay
## Active assay: RNA (23326 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
##
## $`365`
## An object of class Seurat
## 23326 features across 4685 samples within 1 assay
## Active assay: RNA (23326 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
##
## $`63`
## An object of class Seurat
## 13680 features across 983 samples within 1 assay
## Active assay: RNA (13680 features, 2000 variable features)
## 2 dimensional reductions calculated: pca, umap
# Plot
(umaps_annotations <- purrr::map(names(seurat_list), function(x) {
p <- plot_annotation(
seurat_list[[x]],
pt_size = 0.5,
colors_reference = color_annotations,
patient_id = x
)
p +
ggtitle(x) +
theme(plot.title = element_text(size = 12, face = "bold", hjust = 0.5))
}))
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
##
## [[5]]
# Load DEG
dea <- readRDS(path_to_deg)
set.seed(1234)
dea <- dea[names(seurat_list)]
gsea_patient_specific <- purrr::map(dea, function(df) {
gene_list <- df$avg_log2FC
names(gene_list) <- df$gene
gsea_results <- gseGO(
gene_list,
ont = "BP",
OrgDb = org.Hs.eg.db,
keyType = "SYMBOL",
minGSSize = min_gs_size,
maxGSSize = max_gs_size,
seed = TRUE
)
gsea_results <- clusterProfiler::simplify(
gsea_results,
cutoff = simplify_cutoff
)
gsea_results
})
gsea_sorted <- purrr::map(gsea_patient_specific, function(x) {
df <- x@result %>%
dplyr::filter(p.adjust < alpha) %>%
dplyr::arrange(desc(NES))
df
})
DT::datatable(gsea_sorted$`12`, options = list(scrollX = TRUE))
DT::datatable(gsea_sorted$`19`, options = list(scrollX = TRUE))
DT::datatable(gsea_sorted$`63`, options = list(scrollX = TRUE))
DT::datatable(gsea_sorted$`365`, options = list(scrollX = TRUE))
DT::datatable(gsea_sorted$`3299`, options = list(scrollX = TRUE))
# Plot
dot_plots_gsea <- purrr::map(gsea_patient_specific, function(obj) {
p <- clusterProfiler::dotplot(
obj,
x = "NES",
showCategory = 40,
color = "p.adjust"
)
p +
theme(axis.text.y = element_text(size = 9))
})
dot_plots_gsea
## $`12`
##
## $`19`
##
## $`3299`
##
## $`365`
##
## $`63`
upregulated_terms <- purrr::map(gsea_sorted, function(df) {
x <- df$ID[df$NES > 0]
x
})
downregulated_terms <- purrr::map(gsea_sorted, function(df) {
x <- df$ID[df$NES < 0]
x
})
upset_upregulated <- upset(fromList(upregulated_terms), order.by = "freq")
upset_downregulated <- upset(fromList(downregulated_terms), order.by = "freq")
upset_upregulated
upset_downregulated
As we can see, patients 12, 63 and 365 share a great deal of enriched GO terms:
go_terms <- list(
"12" = gsea_sorted$`12`$Description[gsea_sorted$`12`$NES > 0],
"63" = gsea_sorted$`63`$Description[gsea_sorted$`63`$NES > 0],
"365" = gsea_sorted$`365`$Description[gsea_sorted$`365`$NES > 0]
)
common_terms <- Reduce(intersect, go_terms)
common_terms
## [1] "oxidative phosphorylation" "mitochondrial translation" "mitochondrial gene expression" "ATP metabolic process" "cellular protein complex disassembly" "NADH dehydrogenase complex assembly" "RNA localization to Cajal body" "telomerase RNA localization to Cajal body" "telomerase RNA localization" "RNA localization to nucleus" "protein localization to nuclear body" "protein localization to Cajal body" "regulation of protein localization to Cajal body" "positive regulation of protein localization to Cajal body" "ribose phosphate biosynthetic process" "cellular amino acid metabolic process" "regulation of cellular amine metabolic process" "regulation of cellular amino acid metabolic process" "monosaccharide catabolic process"
Importantly, the majority of them are related with oxidative phosphorylation and mitochondrial gene expression and metabolism. Interestingly, the term with the lowest NES of patient 12 is BCR signaling. Thus, let us visualize those main terms and convert them to cell-specific scores with the AddModuleScore function:
selected_terms <- c("GO:0006119", "GO:0032543", "GO:0050853")
selected_titles <- c("oxidative phosphorylation", "mitochondrial translation",
"B cell receptor signaling pathway")
gsea_plots <- purrr::map(gsea_patient_specific, function(obj) {
plots <- purrr::map2(selected_terms, selected_titles, function(x, title) {
p <- gseaplot(obj, geneSetID = x, by = "runningScore", title = title)
p
})
plots
})
gsea_plots
## $`12`
## $`12`[[1]]
##
## $`12`[[2]]
##
## $`12`[[3]]
##
##
## $`19`
## $`19`[[1]]
##
## $`19`[[2]]
##
## $`19`[[3]]
##
##
## $`3299`
## $`3299`[[1]]
##
## $`3299`[[2]]
##
## $`3299`[[3]]
##
##
## $`365`
## $`365`[[1]]
##
## $`365`[[2]]
##
## $`365`[[3]]
##
##
## $`63`
## $`63`[[1]]
##
## $`63`[[2]]
##
## $`63`[[3]]
Calculate signatures:
# Define and calculate
gene_sets <- gsea_patient_specific$`12`@geneSets
saveRDS(
gene_sets,
here::here("6-differential_expression_analysis/tmp/gene_sets_gsea_analysis.rds")
)
signatures <- list(
oxphos = gene_sets$`GO:0006119`,
mitochondrial_translation = gene_sets$`GO:0032543`,
bcr_signaling = gene_sets$`GO:0050853`
)
seurat_list <- purrr::map(seurat_list, function(seurat_obj) {
seurat_obj <- AddModuleScore(
seurat_obj,
features = signatures,
name = names(signatures)
)
seurat_obj
})
# Plot
umaps_oxphos <- purrr::map(seurat_list, function(seurat_obj) {
p <- FeaturePlot(seurat_obj, "oxphos1") +
scale_color_viridis_c(option = "magma") +
ggtitle("OXPHOS Score")
p
})
umaps_oxphos
## $`12`
##
## $`19`
##
## $`3299`
##
## $`365`
##
## $`63`
umaps_mt_translation <- purrr::map(seurat_list, function(seurat_obj) {
p <- FeaturePlot(seurat_obj, "mitochondrial_translation2") +
scale_color_viridis_c(option = "magma") +
ggtitle("MT Translation Score")
p
})
umaps_mt_translation
## $`12`
##
## $`19`
##
## $`3299`
##
## $`365`
##
## $`63`
umaps_bcr <- purrr::map(seurat_list, function(seurat_obj) {
p <- FeaturePlot(seurat_obj, "bcr_signaling3") +
scale_color_viridis_c(option = "magma") +
ggtitle("BCR Signaling Score")
p
})
umaps_bcr
## $`12`
##
## $`19`
##
## $`3299`
##
## $`365`
##
## $`63`
Surprisingly, OXPHOS/MT translation seem to be largely exclusive with BCR signaling. In addition, it seems that the RT-like quiescent cluster of patient 63 could be further stratified by OXPHOS and ENO1 expression.
FeaturePlot(
seurat_list$`63`,
c("oxphos1", "bcr_signaling3"),
blend = TRUE
)
FeaturePlot(
seurat_list$`63`,
c("oxphos1", "mitochondrial_translation2"),
blend = TRUE
)
# Idents(seurat_list$`12`) <- "annotation_final"
# Idents(seurat_list$`63`) <- "annotation_final"
# richter_annotations <- c(
# "CCND2hi Richter-like quiescent",
# "CCND2lo Richter-like quiescent",
# "Richter-like proliferative",
# "Richter-like"
# )
# signatures_vars <- c("b_cell_signaling1", "oxphos2", "mitochondrial_translation3")
# signatures_titles <- c(
# "BCR Signaling Score",
# "OXPHOS Score",
# "Mitochondrial Translation Score"
# )
# color_violin <- c("#dcdddc", "#cd8899")
# signatures_violin_p <- purrr::map(c("12", "63"), function(patient) {
# seurat_obj <- seurat_list[[patient]]
# seurat_obj$is_richter_like <- ifelse(
# seurat_obj$annotation_final %in% richter_annotations,
# "Richter-like",
# "CLL-like"
# )
# ps <- purrr::map2(signatures_vars, signatures_titles, function(var, y_lab) {
# p <- seurat_obj@meta.data %>%
# mutate(sample_description2 = str_c(
# time_point,
# sample_description,
# sep = "_"
# )) %>%
# ggplot(aes_string(
# "sample_description2",
# var,
# fill = "is_richter_like",
# color = "is_richter_like"
# )) +
# geom_violin(alpha = 0.35) +
# geom_sina(size = 0.5, alpha = 1) +
# scale_color_manual(values = color_violin) +
# scale_fill_manual(values = color_violin) +
# labs(title = x, x = "", y = y_lab, color = "", fill = "") +
# theme_bw() +
# theme(
# axis.text.x = element_text(angle = 45, hjust = 0.95, color = "black"),
# plot.title = element_text(size = 12, hjust = 0.5)
# )
# p
# })
# out <- ggarrange(plotlist = ps, ncol = 3, common.legend = TRUE)
# out
# })
# seurat_list$`12`$is_richter_like <- ifelse(
# seurat_list$`12`$annotation_final %in% richter_annotations,
# "Richter-like",
# "CLL-like"
# )
# seurat_list$`12`@meta.data %>%
# mutate(sample_description2 = str_c(
# time_point,
# sample_description_FN,
# sep = "_"
# )) %>%
# ggplot(aes(sample_description2, oxphos2, fill = is_richter_like, color = is_richter_like)) +
# geom_violin(alpha = 0.35) +
# geom_sina(size = 0.5, alpha = 1) +
# scale_color_manual(values = c("#dcdddc", "#cd8899")) +
# scale_fill_manual(values = c("#dcdddc", "#cd8899")) +
# labs(x = "", y = "OXPHOS score", color = "", fill = "") +
# theme_bw() +
# theme(axis.text.x = element_text(angle = 45, hjust = 0.95, color = "black"))
saveRDS(gsea_sorted, path_to_save_rds)
saveRDS(gsea_patient_specific, path_to_save_rds2)
openxlsx::write.xlsx(gsea_sorted, path_to_save_xlsx)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] parallel stats4 stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.3 tidyverse_1.3.1 ggforce_0.3.3 ggrepel_0.9.1 ggplot2_3.3.5 org.Hs.eg.db_3.12.0 AnnotationDbi_1.52.0 IRanges_2.24.1 S4Vectors_0.28.1 Biobase_2.50.0 BiocGenerics_0.36.1 clusterProfiler_3.18.1 UpSetR_1.4.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] utf8_1.2.2 reticulate_1.20 tidyselect_1.1.1 RSQLite_2.2.7 htmlwidgets_1.5.3 grid_4.0.4 BiocParallel_1.24.1 Rtsne_0.15 scatterpie_0.1.6 munsell_0.5.0 codetools_0.2-18 ica_1.0-2 DT_0.18 future_1.21.0 miniUI_0.1.1.1 withr_2.4.2 colorspace_2.0-2 GOSemSim_2.16.1 highr_0.9 knitr_1.33 rstudioapi_0.13 ROCR_1.0-11 tensor_1.5 DOSE_3.16.0 listenv_0.8.0 labeling_0.4.2 polyclip_1.10-0 bit64_4.0.5 farver_2.1.0 rprojroot_2.0.2 downloader_0.4 parallelly_1.26.0 vctrs_0.3.8 generics_0.1.0 xfun_0.23 R6_2.5.0 graphlayouts_0.7.1 spatstat.utils_2.2-0 cachem_1.0.5 fgsea_1.16.0 assertthat_0.2.1 promises_1.2.0.1 scales_1.1.1 ggraph_2.0.5 enrichplot_1.10.2 gtable_0.3.0 globals_0.14.0 goftest_1.2-2 tidygraph_1.2.0 rlang_0.4.11 splines_4.0.4 lazyeval_0.2.2 spatstat.geom_2.1-0 broom_0.7.7
## [55] BiocManager_1.30.15 yaml_2.2.1 reshape2_1.4.4 abind_1.4-5 modelr_0.1.8 crosstalk_1.1.1 backports_1.2.1 httpuv_1.6.1 qvalue_2.22.0 tools_4.0.4 bookdown_0.22 ellipsis_0.3.2 spatstat.core_2.1-2 jquerylib_0.1.4 RColorBrewer_1.1-2 ggridges_0.5.3 Rcpp_1.0.7 plyr_1.8.6 rpart_4.1-15 deldir_0.2-10 pbapply_1.4-3 viridis_0.6.1 cowplot_1.1.1 zoo_1.8-9 haven_2.4.1 cluster_2.1.1 here_1.0.1 fs_1.5.0 magrittr_2.0.1 data.table_1.14.0 scattermore_0.7 openxlsx_4.2.3 DO.db_2.9 lmtest_0.9-38 reprex_2.0.0 RANN_2.6.1 fitdistrplus_1.1-5 matrixStats_0.59.0 hms_1.1.0 patchwork_1.1.1 mime_0.10 evaluate_0.14 xtable_1.8-4 readxl_1.3.1 gridExtra_2.3 compiler_4.0.4 KernSmooth_2.23-18 crayon_1.4.1 shadowtext_0.0.8 htmltools_0.5.1.1 mgcv_1.8-36 later_1.2.0 lubridate_1.7.10 DBI_1.1.1
## [109] tweenr_1.0.2 dbplyr_2.1.1 MASS_7.3-53.1 Matrix_1.3-4 cli_3.0.1 igraph_1.2.6 pkgconfig_2.0.3 rvcheck_0.1.8 plotly_4.9.4 spatstat.sparse_2.0-0 xml2_1.3.2 bslib_0.2.5.1 rvest_1.0.0 digest_0.6.27 sctransform_0.3.2 RcppAnnoy_0.0.19 spatstat.data_2.1-0 rmarkdown_2.8 cellranger_1.1.0 leiden_0.3.8 fastmatch_1.1-0 uwot_0.1.10 shiny_1.6.0 lifecycle_1.0.0 nlme_3.1-152 jsonlite_1.7.2 viridisLite_0.4.0 fansi_0.5.0 pillar_1.6.2 lattice_0.20-41 fastmap_1.1.0 httr_1.4.2 survival_3.2-10 GO.db_3.12.1 glue_1.4.2 zip_2.2.0 png_0.1-7 bit_4.0.4 stringi_1.6.2 sass_0.4.0 blob_1.2.1 memoise_2.0.0 irlba_2.3.3 future.apply_1.7.0